clear
set more off
macro drop all
capture log close

/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection
Create a Dataset Containing All of the Schools a Student Ever Attended

Created on: 2/26/19

Last Modified on: 2/20/2024

Description: This do file uses the rawsrsd MCER student level data to create a list of
	     all of the schools a student ever attended.
	     
Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

** Setting the Directory
global rawdata 
global cleandata 
global tmp 
global intersrsd 
global geo 

/********************************************************************************

There are 2 steps to this file:
1) Dig into raw MCER data to construct student's full school enrollment history
2) Dig into raw MCER data to construct student's full census block history
	
*******************************************************************************/

***********************
***(1) USE INTERMEDIATE MSDS FILE TO CREATE A LOG OF ALL OF THE SCHOOLS A STUDENT 
*** ATTENDED
***********************
gzuse $intersrsd/allschools_sec7_pastyears.dta.gz, clear
drop if bcode==.
drop if fte+fte_sped==0
keep ric year bcode enrolldate1 enrolldate2 enrolldate3
reshape long enrolldate, i(ric year bcode) j(j)
drop j 
drop if enrolldate==.
sort ric year enrolldate bcode

**Calculate the # schools attended in that school year (including enrollment in 
**2 schools at the same time)
bysort ric year: gen n_schools=_N
la var n_schools "# Schools Attended in Year"

bysort ric: gen n=_N
sum n
local max=r(max)
drop n

**Calculate the number of schools attended before the current school year
bysort ric: gen n_prior_schools=0 if _n==1
sort ric year enrolldate bcode
local i=1
while `i'<`max' {
	replace n_prior_schools=n_prior_schools[_n-1] if ric==ric[_n-1] & bcode==bcode[_n-1]
	replace n_prior_schools=n_prior_schools[_n-1]+1 if ric==ric[_n-1] & bcode!=bcode[_n-1]
	local ++i
}
bysort ric year: gen n_schools_before_year_tmp=n_prior_schools if _n==1
bysort ric year: egen n_schools_before_year=max(n_schools_before_year_tmp)
drop n_schools_before_year_tmp
la var n_prior_schools "# School Transitions Before the Current School"
la var n_schools_before_year "# School Transitions Before the Current School Year"

**Calculate the number of schools attended after the current school year
bysort ric: gen n_later_schools=0 if _n==_N
sort ric year enrolldate bcode
local i=1
while `i'<`max' {
	replace n_later_schools=n_later_schools[_n+1] if ric==ric[_n+1] & bcode==bcode[_n+1]
	replace n_later_schools=n_later_schools[_n+1]+1 if ric==ric[_n+1] & bcode!=bcode[_n+1]
	local ++i
}
bysort ric year: gen n_schools_after_year_tmp=n_later_schools if _n==_N
bysort ric year: egen n_schools_after_year=max(n_schools_after_year_tmp)
drop n_schools_after_year_tmp
la var n_later_schools "# School Transitions After the Current School"
la var n_schools_after_year "# School Transitions After the Current School Year"

**Save a version with the full list of schools
sort ric year enrolldate bcode
save "$cleandata/school_list_full.dta", replace

**Save another version which is unique at the student*year level
egen tag=tag(ric year)
keep if tag==1
drop tag
keep ric year n_schools n_schools_before_year n_schools_after_year
sort ric year
compress
save "$cleandata/school_list_student_year.dta", replace

***********************
***(2) USE INTERMEDIATE MSDS FILE TO CREATE A LOG OF ALL OF THE NEIGHBORHOODS WHERE A STUDENT 
*** LIVED
***********************

**I don't know the dates that the student lived in each censusblock, but I do know
**the school that they attended when they lived there. And I know the enrollment dates
**at every school. So I should be able to back out some ordering of the neighborhoods

gzuse $geo/msds_censusblocks_pastyears.dta.gz, clear
keep ric year bcode censusblock 
bysort ric year bcode: gen j=_n
reshape wide censusblock, i(ric year bcode) j(j)
tempfile cb
save `cb'

use "$cleandata/school_list_full.dta", clear
keep ric year bcode enrolldate
merge m:1 ric year bcode using `cb'
drop if _merge==1
drop _merge

**In practice, it is very rare for a student to have switched census blocks while
**attending the same school (eg. censusblock2 is non-missing for only 2 observations).
**So I just drop it to simplify the problem, as it won't affect anything.
drop censusblock2
sort ric year enrolldate 

**Calculate the # census blocks lived in during that school year
bysort ric year: gen n_cb=_N
la var n_cb "# Census Blocks in Year"

bysort ric: gen n=_N
sum n
local max=r(max)
drop n
rename censusblock1 censusblock

**Calculate the number of census blocks lived in before the current school year
bysort ric: gen n_prior_cb=0 if _n==1
sort ric year enrolldate censusblock
local i=1
while `i'<`max' {
	replace n_prior_cb=n_prior_cb[_n-1] if ric==ric[_n-1] & censusblock==censusblock[_n-1]
	replace n_prior_cb=n_prior_cb[_n-1]+1 if ric==ric[_n-1] & censusblock!=censusblock[_n-1]
	local ++i
}
bysort ric year: gen n_cb_before_year_tmp=n_prior_cb if _n==1
bysort ric year: egen n_cb_before_year=max(n_cb_before_year_tmp)
drop n_cb_before_year_tmp
la var n_prior_cb "# Census Block Transitions Before the Current CB"
la var n_cb_before_year "# Census Block Transitions Before the Current School Year"

**Calculate the number of census blocks lived in after the current school year
bysort ric: gen n_later_cb=0 if _n==_N
sort ric year enrolldate censusblock
local i=1
while `i'<`max' {
	replace n_later_cb=n_later_cb[_n+1] if ric==ric[_n+1] & censusblock==censusblock[_n+1]
	replace n_later_cb=n_later_cb[_n+1]+1 if ric==ric[_n+1] & censusblock!=censusblock[_n+1]
	local ++i
}
bysort ric year: gen n_cb_after_year_tmp=n_later_cb if _n==_N
bysort ric year: egen n_cb_after_year=max(n_cb_after_year_tmp)
drop n_cb_after_year_tmp
la var n_later_cb "# Census Block Transitions After the Current CB"
la var n_cb_after_year "# Census Block Transitions After the Current School Year"

**Save a version with the full list of census blocks
sort ric year enrolldate censusblock
save "$cleandata/censusblock_list_full.dta", replace

**Save another version that is unique at the student*year level
egen tag=tag(ric year)
keep if tag==1
drop tag
keep ric year n_cb n_cb_before_year n_cb_after_year
sort ric year
compress
save "$cleandata/cb_list_student_year.dta", replace

